#!/usr/bin/python
# -*- coding: utf-8 -*-
'''
@date: 2011-11-24
@author: shell.xu
'''
import os, sys, time, getopt
import traceback, unicodedata
import chardet, segment
from os import path

segdbname = 'frq.db'
dbname = 'classify.db'

def walkdir(basepath, func, *params):
    for base, dirs, files in os.walk(basepath):
        for filename in files:
            try: func(path.join(base, filename), *params)
            except: traceback.print_exc()

def create_fisher():
    ''' create : create fisher db. '''
    fisher = segment.Fisher()
    fisher.savefile(dbname)

def train_fisher(hamdir, spamdir):
    ''' train_fisher hamdir spamdir : train fisher with ham and spam. '''
    seg = segment.get_cutter(segdbname)
    fisher = segment.Fisher(dbname)
    fisher.set_segment(seg.parse)
    walkdir(hamdir, fisher.trainfile, 'ham')
    walkdir(spamdir, fisher.trainfile, 'spam')
    fisher.sync()

def classify_fisher(*filepathes):
    ''' classify_fisher filepath .. : classify file. '''
    seg = segment.get_cutter(segdbname)
    fisher = segment.Fisher(dbname)
    fisher.set_segment(seg.parse)
    for fp in filepathes:
        fisher.classifyfile(fp, ['ham', 'spam'])

cmds = ['create_fisher', 'train_fisher', 'classify_fisher',]
def main():
    opts, args = getopt.getopt(sys.argv[1:], 'd:f:')
    for opt, val in opts:
        if opt == '-d':
            global segdbname
            segdbname = val
        elif opt == '-f':
            global dbname
            dbname = val
    if len(args) == 0:
        print '%s cmds params ...' % sys.argv[0]
        for name in cmds: print '%s:%s' % (name, eval(name).__doc__)
    else: eval(args[0])(*args[1:])

if __name__ == '__main__': main()
